feat: Google GenAI - add FileContent support + refactoring#2860
feat: Google GenAI - add FileContent support + refactoring#2860
Conversation
| elif isinstance(content_part, (ImageContent, FileContent)): | ||
| cls_name = content_part.__class__.__name__ | ||
|
|
||
| if not message.is_from(ChatRole.USER): | ||
| msg = f"{cls_name} is only supported for user messages" | ||
| raise ValueError(msg) | ||
|
|
||
| # Validate image MIME type and format | ||
| if not content_part.mime_type: | ||
| msg = f"Mime type is required to use {cls_name} with GoogleGenAIChatGenerator" | ||
| raise ValueError(msg) | ||
|
|
||
| if ( | ||
| isinstance(content_part, ImageContent) | ||
| and content_part.mime_type not in GOOGLE_GENAI_SUPPORTED_MIME_TYPES | ||
| ): | ||
| supported_types = list(GOOGLE_GENAI_SUPPORTED_MIME_TYPES.keys()) | ||
| msg = ( | ||
| f"Unsupported image MIME type: {content_part.mime_type}. " | ||
| f"Google AI supports the following MIME types: {supported_types}" | ||
| ) | ||
| raise ValueError(msg) | ||
|
|
||
| # Use inline data approach | ||
| try: | ||
| base64_data = ( | ||
| content_part.base64_data if isinstance(content_part, FileContent) else content_part.base64_image | ||
| ) | ||
| bytes_data = base64.b64decode(base64_data) | ||
|
|
||
| # Create Part using from_bytes method | ||
| file_part = types.Part.from_bytes(data=bytes_data, mime_type=content_part.mime_type) | ||
| parts.append(file_part) | ||
|
|
||
| except Exception as e: | ||
| msg = f"Failed to process {cls_name} data: {e}" | ||
| raise RuntimeError(msg) from e |
There was a problem hiding this comment.
this is the new part about FileContent (handled jointly with ImageContent)
| def test_convert_message_to_google_genai_file_content(self, test_files_path): | ||
| file_path = test_files_path / "sample_pdf_3.pdf" | ||
| file_content = FileContent.from_file_path(file_path) | ||
| message = ChatMessage.from_user(content_parts=["Describe this document:", file_content]) | ||
| google_content = _convert_message_to_google_genai_format(message) | ||
| assert google_content.role == "user" | ||
| assert len(google_content.parts) == 2 | ||
|
|
||
| assert google_content.parts[0].text == "Describe this document:" | ||
| assert hasattr(google_content.parts[1], "inline_data") | ||
| assert google_content.parts[1].inline_data.mime_type == "application/pdf" | ||
| assert google_content.parts[1].inline_data.data is not None | ||
|
|
||
| def test_convert_message_to_google_genai_file_content_in_assistant_message(self, test_files_path): | ||
| file_path = test_files_path / "sample_pdf_3.pdf" | ||
| file_content = FileContent.from_file_path(file_path) | ||
| message = ChatMessage.from_assistant("This is a document") | ||
| message._content.append(file_content) | ||
|
|
||
| with pytest.raises(ValueError, match="FileContent is only supported for user messages"): | ||
| _convert_message_to_google_genai_format(message) |
| def test_live_run_with_file_content(self, test_files_path): | ||
| pdf_path = test_files_path / "sample_pdf_3.pdf" | ||
|
|
||
| file_content = FileContent.from_file_path(file_path=pdf_path) | ||
|
|
||
| chat_messages = [ | ||
| ChatMessage.from_user( | ||
| content_parts=[file_content, "Is this document a paper about LLMs? Respond with 'yes' or 'no' only."] | ||
| ) | ||
| ] | ||
|
|
||
| generator = GoogleGenAIChatGenerator() | ||
| results = generator.run(chat_messages) | ||
|
|
||
| assert len(results["replies"]) == 1 | ||
| message: ChatMessage = results["replies"][0] | ||
|
|
||
| assert message.is_from(ChatRole.ASSISTANT) | ||
|
|
||
| assert message.text | ||
| assert "no" in message.text.lower() |
There was a problem hiding this comment.
I can imagine this failing because someting changes and the LLM gives variations of a negation, maybe you we could something like this:
indicates_no = any(
phrase in message.text.lower()
for phrase in (
"no",
"nope",
"not about",
"not a paper about",
"it is not",
"it's not",
"the answer is no",
"does not",
"doesn't",
"negative",
)
)
assert indicates_no is True|
This is going to take a while to review. It would be easier to have this split into two separate PRs, one for the new feature and another for the refactoring. |
Yes, you are right. Sorry about that. The only new logic is commented above. The rest is just code moved into separate modules. If it's okay with you, it would be great to review it as-is. If it turns out to be difficult to review this way, I can split it into two separate PRs. |
|
I can review it as is, no worries. |
davidsbatista
left a comment
There was a problem hiding this comment.
few small changes/suggestions
| @@ -830,16 +257,16 @@ def _handle_streaming_response( | |||
|
|
|||
| chunk = None | |||
There was a problem hiding this comment.
unrelated to the PR - but this chunk = None is never used since the chunk in the loop will always bind to what comes from the response_stream
|
|
||
|
|
||
| @component | ||
| class GoogleGenAIChatGenerator: |
There was a problem hiding this comment.
WE can extend the docstring by saying that user messages can include files (e.g. PDFs) via FileContent. Maybe also add a short example.
| msg = f"{cls_name} is only supported for user messages" | ||
| raise ValueError(msg) | ||
|
|
||
| # Validate image MIME type and format |
There was a problem hiding this comment.
nit: consider validation against supported image types instead of only format
| def test_live_run_with_file_content(self, test_files_path): | ||
| pdf_path = test_files_path / "sample_pdf_3.pdf" | ||
|
|
||
| file_content = FileContent.from_file_path(file_path=pdf_path) | ||
|
|
||
| chat_messages = [ | ||
| ChatMessage.from_user( | ||
| content_parts=[file_content, "Is this document a paper about LLMs? Respond with 'yes' or 'no' only."] | ||
| ) | ||
| ] | ||
|
|
||
| generator = GoogleGenAIChatGenerator() | ||
| results = generator.run(chat_messages) | ||
|
|
||
| assert len(results["replies"]) == 1 | ||
| message: ChatMessage = results["replies"][0] | ||
|
|
||
| assert message.is_from(ChatRole.ASSISTANT) | ||
|
|
||
| assert message.text | ||
| assert "no" in message.text.lower() |
There was a problem hiding this comment.
I can imagine this failing because someting changes and the LLM gives variations of a negation, maybe you we could something like this:
indicates_no = any(
phrase in message.text.lower()
for phrase in (
"no",
"nope",
"not about",
"not a paper about",
"it is not",
"it's not",
"the answer is no",
"does not",
"doesn't",
"negative",
)
)
assert indicates_no is True
Related Issues
FileContent#2828Proposed Changes:
FileContentHow did you test it?
CI
Notes for the reviewer
Unfortunately, the PR is big because I'm moving code around.
In this PR, I've commented the code where the new feature is implemented to make it more visible.
Checklist
fix:,feat:,build:,chore:,ci:,docs:,style:,refactor:,perf:,test:.